Supplemental material for ‘Cue reliability, communicative efficiency, and differential subject marking: Evidence from Korean’, by Hanjung Lee. Language 100(3).468–504, 2024.


---
title: "R code for mixed effects logistic regression"
output:
  html_document: default
  word_document: default
  pdf_document: default
editor_options: 
  chunk_output_type: inline
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r,message=FALSE}
library(tidyverse)
library(data.table)
library(lme4)
library(MCMCglmm)
```

```{r 0 code}

data <- fread("/Users/fran/Downloads/lg_data_comp.csv",   
                    sep = ",",
                 encoding = "UTF-8",
              #drop = 1,
              colClasses = list(character = colnames(data)))
data %>% str()

data$Grounding <- as.factor(data$Grounding)
data$Tense <- as.factor(data$Tense)
data$Stativity <- as.factor(data$Stativity)
data$`Subject marking` <- as.numeric(data$`Subject marking`)

### 1. Visualization

draw_barplot <- function(data,row_num){
  x2 <- colnames(data)
  plots <-list()
  for (i in 1:ncol(data)){
    theme_set(theme_grey(base_family='NanumGothic'))
    p = data %>% 
      group_by(.data[[x2[i]]]) %>% 
      summarise(count = n()) %>% 
      ggplot(aes(x=.data[[x2[i]]],
                 y = count,
                 fill = .data[[x2[i]]],
                 color = .data[[x2[i]]]))+
      geom_bar(stat = "identity",
               alpha = 0.25)+
      
      theme_classic()+
      labs(title=paste0("Distribution of ",x2[i]),
           x ="")+theme_classic()+
      theme(
        plot.title = element_text(size=5, face="bold.italic"),
        panel.background = element_rect(fill = "white",color = "black"))
    plots[[(i)]] = ggplotGrob(p)}
  
  
  gridExtra::grid.arrange(grobs = plots, nrow = row_num)

}

draw_barplot(data = data %>% select(-c(Item,Verb)),row_num = 3)

```


```{r 1 code}
# Visualization _ fixed effect * Subject marking

ggplot(data, aes(Grounding, group=`Subject marking`, fill = `Subject marking`))+
    geom_bar(stat='count', position = 'dodge') + labs(title=paste0("Distribution of Grounding"),x ="")+
    theme_classic() + theme(plot.title = element_text(hjust = 0.5))

ggplot(data, aes(Tense, group=`Subject marking`, fill = `Subject marking`))+
    geom_bar(stat='count', position = 'dodge') + labs(title=paste0("Distribution of Tense"),x ="")+
    theme_classic() + theme(plot.title = element_text(hjust = 0.5))

ggplot(data, aes(Stativity, group=`Subject marking`, fill = `Subject marking`))+
    geom_bar(stat='count', position = 'dodge') + labs(title=paste0("Distribution of Stativity"),x ="")+
    theme_classic() + theme(plot.title = element_text(hjust = 0.5))


# check count
print( 'Grounding')
table(data$Grounding, data$`Subject marking`)

print( 'Tense')
table(data$Tense, data$`Subject marking`)

print( 'Stativity')
table(data$Stativity, data$`Subject marking`)
```

```{r 2 code}
# 2. modelling

fit <- glmer(`Subject marking` ~Grounding*Tense*Stativity +
              (1| Speaker) +
              (1| Verb), data = data %>% select(-Item),
             family = binomial, control = glmerControl(optimizer = "bobyqa"))


# 3. Result

(fit_table = summary(fit))

# random intercept
ranef(fit)
```

```{r 3 code}

se <- sqrt(diag(vcov(fit)))
se_rand <- fit_table$varcor %>% as.numeric()


# table of estimates with 95% CI - H0: beta = 0

(tab <- cbind(Est = fixef(fit),
              LL = fixef(fit) - 1.96 * se,
              UL = fixef(fit) + 1.96 *
                se))


mm <- MCMCglmm(`Subject marking` ~   Grounding + Tense + Stativity,
               random = ~Speaker+Verb, data=data)
summary(mm)
```

```{r 4 code}
# 임의요인별 계수 출력

verb_res <- coef(fit)$Verb[,1]
verb_res <-  data.frame(name = rownames(coef(fit)$Verb),
           coef_res = verb_res)
verb_res

speak_res <- coef(fit)$Speaker[,1]
speak_res <- data.frame(name = rownames(coef(fit)$Speaker),
                      coef_res = speak_res)
speak_res

# csv 파일로 저장하는 경우 아래 코드 실행

#write.csv(verb_res,"verb_result.csv")
#write.csv(speak_res,"Speaker_result.csv")

```

```{r 5 code}
# 4. Diagnostice
# (1) Normality

qqnorm(resid(fit))
shapiro.test(resid(fit)) # H0 : normality holds


# (2) constant variance

plot(fit)

# (3) Outlier checking

MSE = var(resid(fit))
which_index = which(abs(resid(fit)/MSE)>3) # outlier 존재
data[which_index,]

# (4) 독립성 검정

print('---------------------------------')
print('독립성 검정')
car::durbinWatsonTest(resid(fit)) #independence
```